
the.col.names <- c("year", "inc_ul_quin1", "inc_ul_quin2", "inc_ul_quin3", "inc_ul_quin4", "inc_ll_95")

# Process the actual Swedish data, starting with the 1954-2010 period.
sweden.inc.dists <- read.csv("../source_data/se/Sweden - Income Tables-1_bands.csv")
sweden.inc.totals <- read.csv("../source_data/se/Sweden - Income Tables-1_totals.csv")
sweden.inc.totals$inc_mean <- NA

# Recode the "None reported" entries to in inc_ll=0 and inc_ul=0
no.inc.idxs <- which(sweden.inc.dists$Lower.Bound == "None reported"
                     | (sweden.inc.dists$Lower.Bound == 0 & is.na(sweden.inc.dists$Upper.Bound))
)
sweden.inc.dists$Lower.Bound[no.inc.idxs] <- 0
sweden.inc.dists$Upper.Bound[no.inc.idxs] <- 0

sweden.inc.dists$year <- sweden.inc.dists$Year.of.Income
sweden.inc.dists$inc_ll <- as.numeric(as.character(sweden.inc.dists$Lower.Bound))
sweden.inc.dists$inc_ul <- as.numeric(as.character(sweden.inc.dists$Upper.Bound))

sweden.inc.dists <- sweden.inc.dists[with(sweden.inc.dists, order(year, inc_ll, inc_ul)),]

# Substitutions to fix the (temporary) introduction of about 1 million 15 year-olds to
# the bottom income band, 1992-1998.  The young.adjustments data are drawn from the same
# yearbooks, and correspond to the numbers of 15y/o bottom earners.
young.adjustments <- c(1003697, 903072, 845975, 911579, 878800, 829296,886110)
young.year <- 1992
for (adj in young.adjustments) {
  idxs <- sort(which(sweden.inc.dists$year == young.year))
  idx <- idxs[2]
  sweden.inc.dists$Number.of.Earners[idx] <- sweden.inc.dists$Number.of.Earners[idx] - adj
  young.year <- young.year + 1
}

sweden.inc.dists$pop_pc <- NA
for (yr in seq(1954, 2010, 1)) {
  yr.idxs <- which(sweden.inc.dists$year == yr)
  total.pop <- sum(sweden.inc.dists$Number.of.Earners[yr.idxs])
  sweden.inc.dists$pop_pc[yr.idxs] <- 100*sweden.inc.dists$Number.of.Earners[yr.idxs] / total.pop
  totals.idx <- which(sweden.inc.totals$year == yr)
  sweden.inc.totals$inc_mean[totals.idx] <- sweden.inc.totals$Total.Earned[totals.idx] / total.pop
}
sweden.inc.dists <- subset(sweden.inc.dists, !is.na(Upper.Bound))
bounds <- data.frame()
for (yr in seq(1954, 2010, 1)) {
  inc.dist <- subset(sweden.inc.dists, year == yr & !is.na(inc_ll), select=c("inc_ll", "inc_ul", "pop_pc"))
  inc.bounds <- get.pctile.inc.bounds(inc.dist)
  new.row <- append(c(yr), inc.bounds)
  bounds <- rbind(bounds, new.row)
}
names(bounds) <- the.col.names
inc.out <- merge(bounds,
                 subset(sweden.inc.totals, select=c("year", "inc_mean"),
                 by="year")
)
write.dta(inc.out, "../generated_data/swedenincomes_19542010_processed.dta")
